In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import decomposition
from sklearn import datasets
import csv
%run 'preprocessor.ipynb' #our own preprocessor functions
In [2]:
with open('data_w1w4.csv', 'r') as f:
reader = csv.reader(f)
data = list(reader)
matrix = obtain_data_matrix(data)
samples = len(matrix)
print("Number of samples: " + str(samples))
print("First entry: " + str(matrix[0]))
In [3]:
fig = plt.figure(1, figsize=(10, 6))
plt.clf()
In [4]:
plt.cla()
pca = decomposition.PCA(n_components=2)
pca.fit(matrix)
X = pca.transform(matrix)
In [5]:
plt.scatter(X[:, 0], X[:, 1], edgecolor='k')
plt.show()